# -*- coding: utf-8 -*-
"""
Scrapy settings for bot_scrapy project
"""

BOT_NAME = 'bot_scrapy'

SPIDER_MODULES = ['bot_scrapy.spiders']
NEWSPIDER_MODULE = 'bot_scrapy.spiders'

# RabbitMQ连接参数
RABBITMQ_CONNECTION_PARAMETERS = {
    'host': 'localhost',
    'port': 5672,
    'virtual_host': '/',
    'credentials': {
        'username': 'root',
        'password': 'root'
    },
    'heartbeat': 0,
    'blocked_connection_timeout': 300
}

# RabbitMQ爬虫设置
RABBITMQ_QUEUE_NAME = '%(spider)s:requests'
RABBITMQ_BATCH_SIZE = 16
RABBITMQ_PREFETCH_COUNT = 1

# Crawl responsibly by identifying yourself (and your website) on the user-agent
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'

# Obey robots.txt rules
ROBOTSTXT_OBEY = True

# Configure maximum concurrent requests performing at the same time to the same domain
CONCURRENT_REQUESTS_PER_DOMAIN = 16
CONCURRENT_REQUESTS_PER_IP = 16

DOWNLOADER_MIDDLEWARES = {
    # 'scrapy_rabbitmq.middlewares.BotManagerMiddleware': 1,
    # 'bot_scrapy.middlewares.SiteStrategyMiddleware': 790,
    # 'bot_scrapy.middlewares.SeleniumMiddleware': 800,
}

SCHEDULER = "bot_scrapy.scheduler.Scheduler"

# Configure item pipelines
ITEM_PIPELINES = {
   'bot_scrapy.pipelines.RabbitmqPipeline': 400,
}

# Enable and configure HTTP caching
HTTPCACHE_ENABLED = True
HTTPCACHE_EXPIRATION_SECS = 0
HTTPCACHE_DIR = 'httpcache'
HTTPCACHE_IGNORE_HTTP_CODES = []
HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'

# Set settings whose default value is deprecated to a future-proof value
REQUEST_FINGERPRINTER_IMPLEMENTATION = '2.7'
TWISTED_REACTOR = 'twisted.internet.asyncio.AsyncioSelectorReactor'
FEED_EXPORT_ENCODING = 'utf-8'
